import urllib.request
from bs4 import BeautifulSoup
import json
from 查找没用的图片 import printList
import time
import re
def getData(x):
    f = urllib.request.urlopen(f"http://jzscyth.shaanxi.gov.cn:7001/PDR/network/informationSearch/informationSearchList?name=&type=&certDeadline=&certReviewUnit=&regType=&pid1=610000&pid2=&pid3=&pageNumber={x}&libraryName=enterpriseLibrary")
    soup = BeautifulSoup(f.read().decode('utf-8'),features="html.parser")
    datas = []
    for tr in soup.select("#enterpriseLibraryIsHides tr"):
        cols = []
        for td in tr.findAll("td"):
            if td.has_attr("style") and td["style"].find("display: none") != -1:
                pass
            else:
                if td.has_attr("title"):
                    cols.append(td["title"])
                else:
                    cols.append(td.getText().strip())
        a = tr.select_one("td a")
        if a is not None and a.has_attr("onclick") and a["onclick"].startswith("vie1"):
            allText = re.findall(re.compile(r'vie1\((.*)\)'), a["onclick"])
            if len(allText) > 0:
                cols.append(allText[0])
            else:
                cols.append("")
        else:
            cols.append("")
        
        datas.append(cols)
    return datas
def saveDatas():
    for x in range(1005,1006):
        ltp_data = json.dumps(getData(x))
        
        def save(filename, contents):
              fh = open(filename, 'w', encoding='utf-8')
              fh.write(contents)
              fh.close()
        save(f'/Users/wangshuguan/陕西省/陕西省{x}.txt', ltp_data)
        time.sleep(1)
        print(x)

# print(getData(1))
saveDatas()
# printList(datas)
# [(hostname + link.img["data-original"],link.img["alt"],hostname + link["href"]) for link in soup.find_all("a", class_="playpic")]
    